import requests
import json
import fake_useragent

'''
http://47.102.196.162/improve/news/?tag_id=0&page=5
http://47.102.196.162/improve/news/1166/

'''
if __name__ == '__main__':
    ua = fake_useragent.UserAgent()
    targe_url = '''http://47.102.196.162/improve/news/'''
    detail_url = '''http://47.102.196.162/improve/news/{news_id}/'''

    params = {'tag_id': 1}
    headers = {
        'User-Agent': ua.random,
    }

    tag_num = 1

    # 针对每一个tag获取文章
    for one_tar_id in range(0, tag_num + 1):
        params01 = {
            'tag_id': one_tar_id,
        }

        # 第一次请求是为了获取该tag下有几页
        request01 = requests.get(targe_url, params=params01, headers=headers)
        content01 = json.loads(request01.content)
        total_page_num = content01.get('data').get('total_pages')

        # print(content01)
        print('this tag has {} pages........'.format(total_page_num))

        for one_page in range(1, total_page_num+1):
            params02 = {
                'tag_id': one_tar_id,
                'page': one_page,
            }
            request02 = requests.get(targe_url, params=params02, headers=headers)
            content02 = json.loads(request02.content)

            news_list = content02.get('data').get('news')
            # print(news_list)

            for one_new in news_list:
                one_new_id = one_new.get('id')
                print("spider is getting the news which id is {}".format(one_new_id))

                request03 = requests.get(detail_url.format(news_id=one_new_id), headers=headers)
                content03 = request03.content.decode('utf-8')
                print(content03)


        #
        # my_request = requests.get(targe_url.format(tag_id=i, page=1))
        # print(my_request.content.decode('utf-8'))
        # try:
        #     content = json.loads(my_request.content)
        #     page_num = content.get('data').get('total_pages')
        #     print(content)
        #     print(page_num)
        # except:
        #     print('sss')